Linear Regression

LinearRegression-model

LinearRegression-model

LinearRegression-estimated

LinearRegression-estimated

LinearRegression-error

LinearRegression-error

LinearRegression-OLS

LinearRegression-OLS

Linear Regression Example

myDataFrame <- read.csv("Data/regression_auto.csv")
head(myDataFrame)
##   make mpg weight weight1 price foreign repairs length
## 1  AMC  22   2930    2.93  4099       0       3    186
## 2  AMC  17   3350    3.35  4749       0       3    173
## 3  AMC  22   2640    2.64  3799       0       3    168
## 4 Audi  17   2830    2.83  9690       1       5    189
## 5 Audi  23   2070    2.07  6295       1       3    174
## 6  BMW  25   2650    2.65  9735       1       4    177
attach(myDataFrame)

# Define variables
Y <- cbind(mpg)
X1 <- cbind(weight1)
X <- cbind(weight1, price, foreign)

# Descriptive statistics
summary(Y)
##       mpg       
##  Min.   :14.00  
##  1st Qu.:17.25  
##  Median :21.00  
##  Mean   :20.92  
##  3rd Qu.:23.00  
##  Max.   :35.00
summary(X1)
##     weight1     
##  Min.   :2.020  
##  1st Qu.:2.643  
##  Median :3.200  
##  Mean   :3.099  
##  3rd Qu.:3.610  
##  Max.   :4.330
summary(X)
##     weight1          price          foreign      
##  Min.   :2.020   Min.   : 3299   Min.   :0.0000  
##  1st Qu.:2.643   1st Qu.: 4466   1st Qu.:0.0000  
##  Median :3.200   Median : 5146   Median :0.0000  
##  Mean   :3.099   Mean   : 6652   Mean   :0.2692  
##  3rd Qu.:3.610   3rd Qu.: 8054   3rd Qu.:0.7500  
##  Max.   :4.330   Max.   :15906   Max.   :1.0000
# Correlation among variables
cor(Y, X)
##        weight1      price   foreign
## mpg -0.8081609 -0.4384618 0.4003376
# Plotting data on a scatter diagram
plot(Y ~ X1, data = myDataFrame)

# Simple linear regression 
simpleLinearRegression <- lm(Y ~ X1)
summary(simpleLinearRegression)
## 
## Call:
## lm(formula = Y ~ X1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.4123 -1.6073 -0.1043  0.9261  8.1072 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  38.0665     2.6112  14.578 2.02e-13 ***
## X1           -5.5315     0.8229  -6.722 5.93e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.86 on 24 degrees of freedom
## Multiple R-squared:  0.6531, Adjusted R-squared:  0.6387 
## F-statistic: 45.19 on 1 and 24 DF,  p-value: 5.935e-07
confint(simpleLinearRegression, level=0.95)
##                 2.5 %    97.5 %
## (Intercept) 32.677256 43.455664
## X1          -7.229797 -3.833196
anova(simpleLinearRegression)
## Analysis of Variance Table
## 
## Response: Y
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## X1         1 369.57  369.57  45.189 5.935e-07 ***
## Residuals 24 196.28    8.18                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Plotting regression line
abline(simpleLinearRegression)

# Predicted values for dependent variable
predValuesDepVarY <- fitted(simpleLinearRegression)
summary(predValuesDepVarY)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   14.12   18.10   20.37   20.92   23.45   26.89
plot(predValuesDepVarY ~ X1)

# Regression residuals
regressionResiduals <- resid(simpleLinearRegression)
summary(regressionResiduals)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -5.4120 -1.6070 -0.1043  0.0000  0.9261  8.1070
plot(regressionResiduals ~ X1)

# Multiple linear regression
multipleLinearRegression <- lm(Y ~ X)
summary(multipleLinearRegression)
## 
## Call:
## lm(formula = Y ~ X)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.6942 -1.1857 -0.0452  0.6433  8.6895 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 42.1661962  4.2647533   9.887 1.48e-09 ***
## Xweight1    -7.1211114  1.6046735  -4.438 0.000207 ***
## Xprice       0.0002258  0.0002654   0.851 0.404002    
## Xforeign    -2.5071265  2.0565685  -1.219 0.235723    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.89 on 22 degrees of freedom
## Multiple R-squared:  0.6752, Adjusted R-squared:  0.6309 
## F-statistic: 15.25 on 3 and 22 DF,  p-value: 1.374e-05
confint(multipleLinearRegression, level=0.95)
##                     2.5 %        97.5 %
## (Intercept)  3.332164e+01 51.0107531780
## Xweight1    -1.044900e+01 -3.7932221856
## Xprice      -3.245229e-04  0.0007760878
## Xforeign    -6.772188e+00  1.7579354345
anova(multipleLinearRegression)
## Analysis of Variance Table
## 
## Response: Y
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## X          3 382.08 127.360  15.247 1.374e-05 ***
## Residuals 22 183.77   8.353                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Predicted values for dependent variable
predValuesDepVarMultiY <- fitted(multipleLinearRegression)
summary(predValuesDepVarMultiY)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   13.90   17.91   20.46   20.92   23.99   27.89
# Regression residuals
regressionResidualsMulti <- resid(multipleLinearRegression)
summary(regressionResidualsMulti)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -4.69400 -1.18600 -0.04524  0.00000  0.64330  8.68900

Simple Regresssion Analysis

Linear Regression Simple

Multiple Regresssion Analysis

Linear Regression Multiple